Support LongCat-Flash-Chat tool call #24083

Xu-Wenqing · 2025-09-02T07:37:32Z

Purpose

vLLM LongCat-Flash-Chat model support: #23991

this PR support LongCat-Flash-Chat model tool call.

Test Plan

Test Script (Streaming):

from openai import OpenAI

openai_api_base = ""
openai_api_key = ""

client = OpenAI(
    api_key=openai_api_key,
    base_url=openai_api_base + "/v1",
)

class bcolors:
    HEADER = '\033[95m'
    OKBLUE = '\033[94m'
    OKCYAN = '\033[96m'
    OKGREEN = '\033[92m'
    WARNING = '\033[93m'
    FAIL = '\033[91m'
    ENDC = '\033[0m'
    BOLD = '\033[1m'
    UNDERLINE = '\033[4m'


tools = [
    {
        "type": "function",
        "function": {
            "name": "get_current_temperature",
            "description": "Get current temperature at a location.",
            "parameters": {
                "type": "object",
                "properties": {
                    "location": {
                        "type": "string",
                        "description": 'The location to get the temperature for, in the format "City, State, Country".',
                    },
                    "unit": {
                        "type": "string",
                        "enum": ["celsius", "fahrenheit"],
                        "description": 'The unit to return the temperature in. Defaults to "celsius".',
                    },
                },
                "required": ["location"],
            },
            "strict": True
        },
    },
    {
        "type": "function",
        "function": {
            "name": "get_temperature_date",
            "description": "Get temperature at a location and date.",
            "parameters": {
                "type": "object",
                "properties": {
                    "location": {
                        "type": "string",
                        "description": 'The location to get the temperature for, in the format "City, State, Country".',
                    },
                    "date": {
                        "type": "string",
                        "description": 'The date to get the temperature for, in the format "Year-Month-Day".',
                    },
                    "unit": {
                        "type": "string",
                        "enum": ["celsius", "fahrenheit"],
                        "description": 'The unit to return the temperature in. Defaults to "celsius".',
                    },
                },
                "required": ["location", "date"],
            },
        },
    },
]


tool_calls_stream = client.chat.completions.create(
    model=client.models.list().data[0].id,
    messages=[
        {
            "role": "system",
            "content": "现在的日期是: 2024-09-30",
        },
        {
            "role": "user",
            "content": "北京今天的天气如何？明天呢？",
        },
    ],
    tools=tools,
    tool_choice="auto",
    stream=True,
    # extra_body={"chat_template_kwargs": {"thinking": True}},
    max_completion_tokens=8192
)

print("reasoning content(Blue) and content(Green):")
chunks = []
for chunk in tool_calls_stream:
    chunks.append(chunk)
    if hasattr(chunk.choices[0].delta, "reasoning_content"):
        reasoning_content = chunk.choices[0].delta.reasoning_content
        if reasoning_content:
            print(bcolors.OKBLUE + reasoning_content, end="", flush=True)
    elif hasattr(chunk.choices[0].delta, "content"):
        content = chunk.choices[0].delta.content
        if content:
            print(bcolors.OKGREEN + content, end="", flush=True)

print(bcolors.ENDC + "\n### end of reasoning content and content. ###\n")

arguments = []
tool_call_idx = -1
for chunk in chunks:
    if chunk.choices[0].delta.tool_calls:
        tool_call = chunk.choices[0].delta.tool_calls[0]

        if tool_call.index != tool_call_idx:
            if tool_call_idx >= 0:
                print(f"streamed tool call arguments: {arguments[tool_call_idx]}")
            tool_call_idx = chunk.choices[0].delta.tool_calls[0].index
            arguments.append("")
        if tool_call.id:
            print(f"streamed tool call id: {tool_call.id} ")

        if tool_call.function:
            if tool_call.function.name:
                print(f"streamed tool call name: {tool_call.function.name}")

            if tool_call.function.arguments:
                arguments[tool_call_idx] += tool_call.function.arguments

if len(arguments):
    print(f"streamed tool call arguments: {arguments[-1]}")

Test Script (Non-Streaming):

from openai import OpenAI

openai_api_base = ""
openai_api_key = ""

client = OpenAI(
    api_key=openai_api_key,
    base_url=openai_api_base + "/v1",
)


tools = [
    {
        "type": "function",
        "function": {
            "strict": True,
            "name": "get_current_temperature",
            "description": "Get current temperature at a location.",
            "parameters": {
                "type": "object",
                "properties": {
                    "location": {
                        "type": "string",
                        "description": 'The location to get the temperature for, in the format "City, State, Country".',
                    },
                    "unit": {
                        "type": "string",
                        "enum": ["celsius", "fahrenheit"],
                        "description": 'The unit to return the temperature in. Defaults to "celsius".',
                    },
                },
                "required": ["location"],
            },
        },
    },
    {
        "type": "function",
        "function": {
            "name": "get_temperature_date",
            "description": "Get temperature at a location and date.",
            "parameters": {
                "type": "object",
                "properties": {
                    "location": {
                        "type": "string",
                        "description": 'The location to get the temperature for, in the format "City, State, Country".',
                    },
                    "date": {
                        "type": "string",
                        "description": 'The date to get the temperature for, in the format "Year-Month-Day".',
                    },
                    "unit": {
                        "type": "string",
                        "enum": ["celsius", "fahrenheit"],
                        "description": 'The unit to return the temperature in. Defaults to "celsius".',
                    },
                },
                "required": ["location", "date"],
            },
        },
    },
]


response = client.chat.completions.create(
    model=client.models.list().data[0].id,
    messages=[
        {
            "role": "system",
            "content": "现在的日期是: 2024-09-30",
        },
        {
            "role": "user",
            "content": "北京今天的天气如何？明天呢？",
        },
    ],
    tools=tools,
    tool_choice="auto",
    stream=False,
)

print(response)
tool_calls = response.choices[0].message.tool_calls
for c in tool_calls:
    print(c.function.name, c.function.arguments)

Test Result

Test Result (Streaming):

reasoning content(Blue) and content(Green):

### end of reasoning content and content. ###

streamed tool call id: chatcmpl-tool-573a577d3b8c4da2bc51e7cd0575bf9e 
streamed tool call name: get_current_temperature
streamed tool call arguments: {"location": "北京, 中国"
streamed tool call id: chatcmpl-tool-4976a639656c4db29282f9b089d15f4c 
streamed tool call name: get_temperature_date
streamed tool call arguments: {"location": "北京, 中国", "date": "2024-10-01"

Test Result (Non-Streaming):

ChatCompletion(id='chatcmpl-4ac67b27-9eca-4089-9477-f51b48c866fd', choices=[Choice(finish_reason='tool_calls', index=0, logprobs=None, message=ChatCompletionMessage(content=None, refusal=None, role='assistant', annotations=None, audio=None, function_call=None, tool_calls=[ChatCompletionMessageFunctionToolCall(id='chatcmpl-tool-b8069bd55afe48a8adc3be98a5fdfaa5', function=Function(arguments='{"location": "Beijing, China"}', name='get_current_temperature'), type='function')], reasoning_content=None), stop_reason=None, token_ids=None)], created=1756806588, model='LongCat-Flash-Chat', object='chat.completion', service_tier=None, system_fingerprint=None, usage=CompletionUsage(completion_tokens=25, prompt_tokens=487, total_tokens=512, completion_tokens_details=None, prompt_tokens_details=None), prompt_logprobs=None, prompt_token_ids=None, kv_transfer_params=None)
get_current_temperature {"location": "Beijing, China"}

Essential Elements of an Effective PR Description Checklist

The purpose of the PR, such as "Fix some issue (link existing issues this PR will resolve)".
The test plan, such as providing test command.
The test results, such as pasting the results comparison before and after, or e2e results
(Optional) The necessary documentation update, such as updating supported_models.md and examples for a new model.
(Optional) Release notes update. If your change is user facing, please update the release notes draft in the Google Doc.